Clueing in DC: An Analysis of DC Crime Data from 2018-2022
DSAN 5200 Final Project
Authors
Affiliation
Brian Kwon
Georgetown University
Powell Sheagren
Dheeraj Oruganty
Published
April 29, 2024
Introduction
Figure 1: Comparing Crime Across Cities in 2019
Code
library(rvest)library(tidyverse)library(plotly)# Parse 2019 crime rate data from wikipediaurl ="https://en.wikipedia.org/wiki/List_of_United_States_cities_by_crime_rate"page =read_html(url)tables =html_table(page, fill =TRUE)crime_data = tables[[1]]# Preprocess the datasetcolnames(crime_data) = crime_data[2, ]crime_data = crime_data[-c(1,2), ]crime_data = crime_data %>%select(1,2,3,4) # Remove unnecessary columnscolnames(crime_data) =c("state", "city", "population", "crime_rate") # Change column namescrime_data$population =as.numeric(gsub(",", "", crime_data$population)) # Change to numericcrime_data$crime_rate =as.numeric(crime_data$crime_rate) # Change to numeric# Leave only one city per state by populationcrime_data = crime_data %>%group_by(state) %>%slice(which.max(population))# Remove footnote numbercrime_data$city =gsub("\\d+$", "", crime_data$city)crime_data$state =gsub("\\d+$", "", crime_data$state)# Change some city name manually for mergingcrime_data = crime_data %>%mutate(city =if_else(city =="Washington, D.C.", "Washington", city)) %>%mutate(city =if_else(city =="Louisville Metro", "Louisville", city))# Get latitude and longitude datalocation =read.csv("./data/uscities.csv")location = location %>%select(1,4,lat,lng)# Merge two data setsdf =merge(crime_data, location, by ="city")df = df %>%filter(state == state_name) %>%select(-state_name)# Color palette# colors = c("#F1FAEE", "#A8DADC", "#457B9D", "#1D3557")# colors = c("#ccdbdc","#9ad1d4","#80ced7","#007ea7","#003249")colors =c("#ccdbdc","#edf8b1", "#7fcdbb", "#2c7fb8")# colors = c("#caf0f8", "#ade8f4", "#90e0ef", "#48cae4", "#00b4d8", "#0096c7", "#0077b6", "#023e8a", "#03045e") # Plot bubble mapmap =plot_geo(df, lat =~lat, lon =~lng) %>%add_markers(text =~paste("State: ", state, "<br>City: ", city, "<br>Crime Rate: ", crime_rate, "<br>Population: ", population), size =~population, color =~crime_rate,colors = colors,opacity =10000,marker =list(sizemode ='area', sizeref =0.2, line =list(color ='black', width =2))) %>%colorbar(title ="Crime Rate") %>%layout(title ='Crime Rate Bubble Map for US cities in 2019', geo =list(scope ='usa'),annotations =list(list(x =0.8, y =0.55, text ="Washington D.C.", showarrow =TRUE, xanchor ='left', yanchor ='bottom', ax =30, ay =30, font =list(size =12, color ="red")),list(x =1, y =0, text ="Size by population", showarrow =FALSE, xanchor='right', yanchor='auto', xshift=0, yshift=0, font=list(size=12, color="grey"))))map
Figure 1: This map represents the relative population and crime rates in the U.S. cities. The size of the dots shows the population of the city and the color represents the amount of crime per 100,000 people.
Figure 2: Crime Proportions in DC From 2018-2022
Code
library(tidyverse)library(DT)# Read data filesoffense_22 =read.csv("./data/DC-2022/NIBRS_OFFENSE.csv") offense_21 =read.csv("./data/DC-2021/NIBRS_OFFENSE.csv") offense_20 =read.csv("./data/DC-2020/NIBRS_OFFENSE.csv") offense_19 =read.csv("./data/DC-2019/NIBRS_OFFENSE.csv") offense_18 =read.csv("./data/DC-2018/NIBRS_OFFENSE.csv") offense_code1 =read.csv("./data/DC-2022/NIBRS_OFFENSE_TYPE.csv")offense_code2 =read.csv("./data/DC-2018/NIBRS_OFFENSE_TYPE.csv")# Merge with code files for corresponding offense namesoffense_22 =merge(offense_22, offense_code1, by ="offense_code")offense_21 =merge(offense_21, offense_code1, by ="offense_code")offense_20 =merge(offense_20, offense_code2, by ="OFFENSE_TYPE_ID")offense_19 =merge(offense_19, offense_code2, by ="OFFENSE_TYPE_ID")offense_18 =merge(offense_18, offense_code2, by ="OFFENSE_TYPE_ID")# Calculate the percentage based on the countoffense_22_count =as.data.frame(round(table(offense_22$offense_name)/nrow(offense_22)*100,2))offense_21_count =as.data.frame(round(table(offense_21$offense_name)/nrow(offense_21)*100,2))offense_20_count =as.data.frame(round(table(offense_20$OFFENSE_NAME)/nrow(offense_20)*100,2))offense_19_count =as.data.frame(round(table(offense_19$OFFENSE_NAME)/nrow(offense_19)*100,2))offense_18_count =as.data.frame(round(table(offense_18$OFFENSE_NAME)/nrow(offense_18)*100,2))# Merge all yearsoffense_df =merge(merge(merge(merge(offense_18_count, offense_19_count, by ="Var1", all =TRUE), offense_20_count, by ="Var1", all =TRUE), offense_21_count, by ="Var1", all =TRUE), offense_22_count, by ="Var1", all =TRUE)colnames(offense_df) =c("Offense Type", "2018", "2019", "2020", "2021", "2022")# Create datatabledatatable(data = offense_df, caption ="Table", filter ="top")
Figure 2: Data was collected from the 2018-2022 FBI’s National Incident-Based Reporting System. The values are the percentage of a total crime that an individual offense made up.
Figure 3: Relationships Between Offenses Within Incidents
Code
import plotly.graph_objects as goimport numpy as npimport networkx as nx## Code for this graph generously donated from:# https://plotly.com/python/network-graphs/## importing matrixmatrix = np.genfromtxt('./data/adjacency_matrix.csv', delimiter =",")# list = np.genfromtxt('Adjacency_list.csv', delimiter = ",")## Turning adjacency matrix to graph obkectG = nx.from_numpy_array(matrix,create_using=nx.DiGraph)## Using a spiral layout to show centralitypos = nx.spiral_layout(G)## Adding position based on the layoutfor i inrange(0,42):for g inrange(0,42): G.nodes[i]['pos'] = pos[i] G.nodes[g]['pos'] = pos[g]## Adding edges togetheredge_x = []edge_y = []for edge in G.edges(): x0, y0 = G.nodes[edge[0]]['pos'] x1, y1 = G.nodes[edge[1]]['pos'] edge_x.append(x0) edge_x.append(x1) edge_x.append(None) edge_y.append(y0) edge_y.append(y1) edge_y.append(None)## arranging them into linesedge_trace = go.Scatter( x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')## adding nodes to graphnode_x = []node_y = []for node in G.nodes(): x, y = pos[node] node_x.append(x) node_y.append(y)## assembly againnode_trace = go.Scatter( x=node_x, y=node_y, mode='markers', hoverinfo='text', marker=dict( showscale=True,# colorscale options#'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |#'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |#'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' | colorscale = ["#E63946", "#F1FAEE", "#A8DADC", "#457B9D", "#1D3557"], reversescale=True, color=[], size=10, colorbar=dict( thickness=15, title='Node Connections', xanchor='left', titleside='right' ), line_width=2))## Offenses for tooltipoffenses_list = [ "Destruction/Damage/Vandalism of Property", "Theft From Motor Vehicle" , "Robbery" , "Simple Assault" , "Intimidation" , "All Other Larceny" , "Motor Vehicle Theft" , "Drug Equipment Violations" , "Drug/Narcotic Violations" , "Weapon Law Violations" , "Stolen Property Offenses" , "Aggravated Assault" , "Purse-snatching" , "Extortion/Blackmail" , "Theft From Building" , "Fondling" , "Counterfeiting/Forgery" , "Theft of Motor Vehicle Parts or Accessories","Credit Card/Automated Teller Machine Fraud" , "Impersonation" , "Pocket-picking" , "Kidnapping/Abduction" , "False Pretenses/Swindle/Confidence Game" , "Burglary/Breaking & Entering" , "Rape" , "Murder and Nonnegligent Manslaughter" , "Theft From Coin-Operated Machine or Device" , "Animal Cruelty" , "Shoplifting" , "Hacking/Computer Invasion" , "Identity Theft" , "Wire Fraud" , "Arson" , "Betting/Wagering" , "Welfare Fraud" , "Pornography/Obscene Material" , "Bribery" , "Purchasing Prostitution" , "Prostitution" , "Sodomy" , "Sexual Assault With An Object", "Other"]# getting tooltipnode_adjacencies = []node_text = []for node, adjacencies inenumerate(G.adjacency()): node_adjacencies.append(len(adjacencies[1])) node_text.append('# of connections: '+str(len(adjacencies[1])) +" | Offense Type: "+ offenses_list[node])node_trace.marker.color = node_adjacenciesnode_trace.text = node_text## Plotting the figurefig = go.Figure(data=[edge_trace, node_trace], layout=go.Layout( title='Amount of times an Offense is Listed with other Ofenses', titlefont_size=16, showlegend=False, hovermode='closest', margin=dict(b=20,l=5,r=5,t=40), annotations=[ dict( text="", showarrow=False, xref="paper", yref="paper", x=0.005, y=-0.002 ) ], xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ))# fig.update_traces(marker = dict(size = node_adjacencies));fig.update_traces(marker=dict(size=node_adjacencies, colorbar_title='Interaction Count'));fig.show()
Figure 3: The network diagram shows data from all years where one incident involved multiple offenses. The lines represent connections between crimes and the size of the nodes mean the amount of times the crime was connected to another.
Figure 4: Offense by Relationship to Victim Heatmap
Code
library(tidyverse)library(plotly)library(heatmaply)## 2018offense_data_2018 <-read.csv("data/DC-2018/NIBRS_OFFENSE.csv") %>%mutate(year =2018)offense_2018 <-read.csv("data/DC-2018/NIBRS_OFFENSE_TYPE.csv")victim_data_2018 <-read.csv("data/DC-2018/NIBRS_VICTIM.csv") %>%mutate(year =2018)relation_2018 <-read.csv("data/DC-2018/NIBRS_VICTIM_OFFENDER_REL.csv")relationship_2018 <-read.csv("data/DC-2018/NIBRS_RELATIONSHIP.csv")offense_data_2018 <-left_join(offense_data_2018,offense_2018, by ="OFFENSE_TYPE_ID")relation_2018 <-left_join(relation_2018,relationship_2018, by ="RELATIONSHIP_ID")victim_data_2018 <-right_join(victim_data_2018,relation_2018, by ="VICTIM_ID")total_data_2018 <-left_join(victim_data_2018,offense_data_2018, by =c("INCIDENT_ID","year"))total_data_2018 <- total_data_2018 %>%select(c(RELATIONSHIP_NAME,OFFENSE_CATEGORY_NAME, year))## 2019offense_data_2019 <-read.csv("data/DC-2019/NIBRS_OFFENSE.csv") %>%mutate(year =2019)offense_2019 <-read.csv("data/DC-2019/NIBRS_OFFENSE_TYPE.csv")victim_data_2019 <-read.csv("data/DC-2019/NIBRS_VICTIM.csv") %>%mutate(year =2019)relation_2019 <-read.csv("data/DC-2019/NIBRS_VICTIM_OFFENDER_REL.csv")relationship_2019 <-read.csv("data/DC-2019/NIBRS_RELATIONSHIP.csv")offense_data_2019 <-left_join(offense_data_2019,offense_2019, by ="OFFENSE_TYPE_ID")relation_2019 <-left_join(relation_2019,relationship_2019, by ="RELATIONSHIP_ID")victim_data_2019 <-right_join(victim_data_2019,relation_2019, by ="VICTIM_ID")total_data_2019 <-left_join(victim_data_2019,offense_data_2019, c("INCIDENT_ID","year"))total_data_2019 <- total_data_2019 %>%select(c(RELATIONSHIP_NAME,OFFENSE_CATEGORY_NAME, year))## 2020offense_data_2020 <-read.csv("data/DC-2020/NIBRS_OFFENSE.csv") %>%mutate(year =2020)offense_2020 <-read.csv("data/DC-2020/NIBRS_OFFENSE_TYPE.csv")victim_data_2020 <-read.csv("data/DC-2020/NIBRS_VICTIM.csv") %>%mutate(year =2020)relation_2020 <-read.csv("data/DC-2020/NIBRS_VICTIM_OFFENDER_REL.csv")relationship_2020 <-read.csv("data/DC-2020/NIBRS_RELATIONSHIP.csv")offense_data_2020 <-left_join(offense_data_2020,offense_2020, by ="OFFENSE_TYPE_ID")relation_2020 <-left_join(relation_2020,relationship_2020, by ="RELATIONSHIP_ID")victim_data_2020 <-right_join(victim_data_2020,relation_2020, by ="VICTIM_ID")total_data_2020 <-left_join(victim_data_2020,offense_data_2020, by =c("INCIDENT_ID","year"))total_data_2020 <- total_data_2020 %>%select(c(RELATIONSHIP_NAME,OFFENSE_CATEGORY_NAME, year))## 2021offense_data_2021 <-read.csv("data/DC-2021/NIBRS_OFFENSE.csv") %>%mutate(year =2021)offense_2021 <-read.csv("data/DC-2021/NIBRS_OFFENSE_TYPE.csv")victim_data_2021 <-read.csv("data/DC-2021/NIBRS_VICTIM.csv") %>%mutate(year =2021)relation_2021 <-read.csv("data/DC-2021/NIBRS_VICTIM_OFFENDER_REL.csv")relationship_2021 <-read.csv("data/DC-2021/NIBRS_RELATIONSHIP.csv")offense_data_2021 <-left_join(offense_data_2021,offense_2021, by ="offense_code")relation_2021 <-left_join(relation_2021,relationship_2021, by ="relationship_id")victim_data_2021 <-right_join(victim_data_2021,relation_2021, by ="victim_id")total_data_2021 <-left_join(victim_data_2021,offense_data_2021, by =c("incident_id","year"))total_data_2021 <- total_data_2021 %>%select(c(relationship_name,offense_category_name, year))## 2022offense_data_2022 <-read.csv("data/DC-2022/NIBRS_OFFENSE.csv") %>%mutate(year =2022)offense_2022 <-read.csv("data/DC-2022/NIBRS_OFFENSE_TYPE.csv")victim_data_2022 <-read.csv("data/DC-2022/NIBRS_VICTIM.csv") %>%mutate(year =2022)relation_2022 <-read.csv("data/DC-2022/NIBRS_VICTIM_OFFENDER_REL.csv")relationship_2022 <-read.csv("data/DC-2022/NIBRS_RELATIONSHIP.csv")offense_data_2022 <-left_join(offense_data_2022,offense_2022, by ="offense_code")relation_2022 <-left_join(relation_2022,relationship_2022, by ="relationship_id")victim_data_2022 <-right_join(victim_data_2022,relation_2022, by ="victim_id")total_data_2022 <-left_join(victim_data_2022,offense_data_2022, by =c("incident_id","year"))total_data_2022 <- total_data_2022 %>%select(c(relationship_name,offense_category_name, year))## adjusting colnames for differencecolnames(total_data_2021) <-c("RELATIONSHIP_NAME","OFFENSE_CATEGORY_NAME", "year")colnames(total_data_2022) <-c("RELATIONSHIP_NAME","OFFENSE_CATEGORY_NAME", "year")## groupstotal_data_relation <-rbind(total_data_2018, total_data_2019, total_data_2020, total_data_2021, total_data_2022)## relationships store for next chunkrelationships <- total_data_relation$RELATIONSHIP_NAME %>%factor() %>%levels()## Splitting the relationships type into indicies and then filtering by themfamily_relationships_index <-c(6,14,15,16,19,21,22)partner_relationships_index <-c(1,5,7,8,11,12,23,24,25,26)acquaintance_relationships_index <-c(3,4,9,10,13,17,18,20)stranger_relationships_index <-c(27)other_relationships_index <-c(2)family_relationships <- relationships[family_relationships_index]partner_relationships <- relationships[partner_relationships_index]acquaintance_relationships <- relationships[acquaintance_relationships_index]stranger_relationships <- relationships[stranger_relationships_index]other_relationships <- relationships[other_relationships_index]## Function for new column of valuesrelation_checker <-function(value){if(value %in% family_relationships){ val <-"Family" } elseif(value %in% partner_relationships){ val <-"Partner/Partners Family" } elseif(value %in% acquaintance_relationships){ val <-"Acquaintance" } elseif(value %in% stranger_relationships){ val <-"Stranger" } else{ val <-"Other" }}## vectorizing the function and adding the columrelation_checker <-Vectorize(relation_checker)total_data_relation <- total_data_relation %>%mutate(Relation_group =relation_checker(RELATIONSHIP_NAME)) %>%filter(Relation_group !="Other")#total_data_relation$Relation_group %>% table()## Making matrix for Vizmat <- total_data_relation %>%group_by(Relation_group,OFFENSE_CATEGORY_NAME) %>%tally() %>%spread(Relation_group,n) %>%as.data.frame()mat[is.na(mat)] <-0rownames(mat) <- mat$OFFENSE_CATEGORY_NAMEmat <- mat %>%select(-OFFENSE_CATEGORY_NAME)# Color palettecolors =c("#F1FAEE", "#A8DADC", "#457B9D", "#1D3557")# colors = c("#ccdbdc","#edf8b1", "#7fcdbb", "#2c7fb8")## Heatmap codeptotal <-heatmaply(mat,label_names =c("Crime Group", "Relation", "Relation Prevelance"),colors = colors,# width = 800, height =600,dendrogram =FALSE,# limits = c(0,10000),scale ="row",branches_lwd =0.1,# hide_colorbar = TRUE,grid_color ="white",grid_width =0.00001,dend_hoverinfo =FALSE,main ="Heatmap of offense category by relationship between victim and offender")ptotal
Fig 4: This heat map shows the amount, by color, of each offense category scaled by each row and what the relationship of the victim was to the offender for the amount.
Figure 5: Interactive Location of Offenses in 2018, 2020, 2022
Figure 5: Interactive Visualization of Top 15 Crime Incidents by Location in Washington D.C. for the Years 2018, 2020, and 2022
Figure 6: Sankey Diagram of Weapon Type and Injury by Offense
Code
library(tidyverse)library(networkD3)library(htmlwidgets)library(htmltools)# Read all necessary filesoffense_18 =read.csv("./data/DC-2018/NIBRS_OFFENSE.csv") offender_18 =read.csv("./data/DC-2018/NIBRS_OFFENDER.csv") victim_18 =read.csv("./data/DC-2018/NIBRS_VICTIM.csv")weapon_18 =read.csv("./data/DC-2018/NIBRS_WEAPON.csv") injury_18 =read.csv("./data/DC-2018/NIBRS_VICTIM_INJURY.csv")# Select offense_id, incident_id, offender_id, victim_id, offense_code, injury_id, weapon_idoffense_18 = offense_18 %>%select(2,3,4)offender_18 = offender_18 %>%select(2,3)victim_18 = victim_18 %>%select(2,3)weapon_18 = weapon_18 %>%select(2,3)injury_18 = injury_18 %>%select(2,3)# Read codes files for nodesoffense_code =read.csv("./data/DC-2018/NIBRS_OFFENSE_TYPE.csv")injury_code =read.csv("./data/DC-2018/NIBRS_INJURY.csv")weapon_code =read.csv("./data/DC-2018/NIBRS_WEAPON_TYPE.csv")# Get offense_code, offense_type_id, offense_nameoffense_code = offense_code %>%select(1,2,3)# Change offense_type_id to offense_codeoffense_18 =merge(offense_18, offense_code, by ="OFFENSE_TYPE_ID")offense_18 = offense_18 %>%select(2,3,4)# Merge by incident_id, offense_id, victim_iddf_18 =merge(merge(merge(merge(offense_18, offender_18, by ="INCIDENT_ID"), victim_18, by ="INCIDENT_ID"), injury_18, by ="VICTIM_ID"), weapon_18, by ="OFFENSE_ID")# Remove incident_id, offense_id, victim_id, offender_iddf_18 = df_18 %>%select(-1,-2,-3,-5)# # Make column names to lower casecolnames(df_18) =tolower(colnames(df_18))# Paste character to make ids uniquedf_18$injury_id =paste0("i", df_18$injury_id)df_18$weapon_id =paste0("w", df_18$weapon_id)# # Count the unique combinations of offense types and weapon types and subset if there are more than 100 casesfirst_link = df_18 %>%group_by(offense_code, weapon_id) %>%summarise(value =n(), .groups ="drop") %>%arrange(desc(value)) %>%rename(source = offense_code, target = weapon_id) %>%filter(value >100)# # Count the unique combinations of weapon types and injury types and subset if there are more than 100 casessecond_link = df_18 %>%group_by(weapon_id, injury_id) %>%summarise(value =n(), .groups ="drop") %>%arrange(desc(value)) %>%rename(source = weapon_id, target = injury_id) %>%filter(value >100)# # Combine those two linkslinks.df =as.data.frame(rbind(first_link,second_link))# Get the codes and namesoffense_code = offense_code %>%select(2,3) %>%rename(name = OFFENSE_CODE, label = OFFENSE_NAME)injury_code = injury_code %>%select(1,3) %>%rename(name = INJURY_ID, label = INJURY_NAME)weapon_code = weapon_code %>%select(1,3) %>%rename(name = WEAPON_ID, label = WEAPON_NAME)# Make codes uniqueinjury_code$name =paste0("i", injury_code$name)weapon_code$name =paste0("w", weapon_code$name)# Combine all the nodesnodes.df =rbind(offense_code, injury_code, weapon_code)# Subset only nodes from the linksnodes.df = nodes.df %>%filter(name %in%c(unique(first_link$source),unique(first_link$target),unique(second_link$target)))# Create source_id and target_id for a sankey diagramlinks.df$source_id =match(links.df$source, nodes.df$name) -1links.df$target_id =match(links.df$target, nodes.df$name) -1# Color Palettemy_color ='d3.scaleOrdinal().range(["#ccdbdc", "#edf8b1", "#7fcdbb", "#2c7fb8"])'# Create a sankey diagramnet =sankeyNetwork(Links = links.df, Nodes = nodes.df, Source ='source_id', Target ='target_id', Value ='value', NodeID ='label', fontSize =16, colourScale=my_color, iterations =0)# Add a titlenet_with_title =prependContent(net, tags$b(HTML('Injuries and weapon type by offense type in 2018')))net_with_title
Injuries and weapon type by offense type in 2018
Code
library(tidyverse)library(networkD3)library(htmlwidgets)library(htmltools)# Read all necessary filesoffense_22 =read.csv("./data/DC-2022/NIBRS_OFFENSE.csv") offender_22 =read.csv("./data/DC-2022/NIBRS_OFFENDER.csv") victim_22 =read.csv("./data/DC-2022/NIBRS_VICTIM.csv")weapon_22 =read.csv("./data/DC-2022/NIBRS_WEAPON.csv") injury_22 =read.csv("./data/DC-2022/NIBRS_VICTIM_INJURY.csv")# Select offense_id, incident_id, offender_id, victim_id, offense_code, injury_id, weapon_idoffense_22 = offense_22 %>%select(2,3,4)offender_22 = offender_22 %>%select(2,3)victim_22 = victim_22 %>%select(2,3)weapon_22 = weapon_22 %>%select(2,3)injury_22 = injury_22 %>%select(2,3)# Merge by incident_id, offense_id, victim_iddf_22 =merge(merge(merge(merge(offense_22, offender_22, by ="incident_id"), victim_22, by ="incident_id"), injury_22, by ="victim_id"), weapon_22, by ="offense_id")# Remove incident_id, offense_id, victim_id, offender_iddf_22 = df_22 %>%select(-1,-2,-3,-5)# Paste character to make ids uniquedf_22$injury_id =paste0("i", df_22$injury_id)df_22$weapon_id =paste0("w", df_22$weapon_id)# Count the unique combinations of offense types and weapon types and subset if there are more than 100 casesfirst_link = df_22 %>%group_by(offense_code, weapon_id) %>%summarise(value =n(), .groups ="drop") %>%arrange(desc(value)) %>%rename(source = offense_code, target = weapon_id) %>%filter(value >100)# Count the unique combinations of weapon types and injury types and subset if there are more than 100 casessecond_link = df_22 %>%group_by(weapon_id, injury_id) %>%summarise(value =n(), .groups ="drop") %>%arrange(desc(value)) %>%rename(source = weapon_id, target = injury_id) %>%filter(value >100)# Combine those two linkslinks.df =as.data.frame(rbind(first_link,second_link))# Read codes files for nodesoffense_code =read.csv("./data/DC-2022/NIBRS_OFFENSE_TYPE.csv")injury_code =read.csv("./data/DC-2022/NIBRS_INJURY.csv")weapon_code =read.csv("./data/DC-2022/NIBRS_WEAPON_TYPE.csv")# Get the codes and namesoffense_code = offense_code %>%select(1,2) %>%rename(name = offense_code, label = offense_name)injury_code = injury_code %>%select(1,3) %>%rename(name = injury_id, label = injury_name)weapon_code = weapon_code %>%select(1,3) %>%rename(name = weapon_id, label = weapon_name)# Make codes uniqueinjury_code$name =paste0("i", injury_code$name)weapon_code$name =paste0("w", weapon_code$name)# Combine all the nodesnodes.df =rbind(offense_code, injury_code, weapon_code)# Subset only nodes from the linksnodes.df = nodes.df %>%filter(name %in%c(unique(first_link$source),unique(first_link$target),unique(second_link$target)))# Create source_id and target_id for a sankey diagramlinks.df$source_id =match(links.df$source, nodes.df$name) -1links.df$target_id =match(links.df$target, nodes.df$name) -1# Color Palettemy_color ='d3.scaleOrdinal().range(["#F1FAEE", "#A8DADC", "#457B9D", "#1D3557"])'# Color groupings# nodes.df = nodes.df %>%# mutate(group = ifelse(name == "13B", "a",# ifelse(name == "13A", "b", # ifelse(name == "120", "c", "g")))) %>%# mutate(group = ifelse(name == "w41", "e", "g")) %>%# mutate(group = ifelse(name == "i4", "f", "g"))# Create a sankey diagramnet =sankeyNetwork(Links = links.df, Nodes = nodes.df, Source ='source_id', Target ='target_id', Value ='value', NodeID ='label', fontSize =16, colourScale=my_color, iterations =0)# Add a titlenet_with_title =prependContent(net, tags$b(HTML('Injuries and weapon type by offense type in 2022')))net_with_title
Injuries and weapon type by offense type in 2022
Figure 6: This sankey diagram shows the offenses, the weapons used, and the amount of injury caused in 2018 and 2022. The paths between the values show the flow of this amount.